1 Set up

Load packages and set parameters

if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(assertthat, BiocManager, tidyverse, gridExtra, here, mapview, 
  prioritizr, prioritizrdata, raster, remotes, rgeos, rgdal, scales, sf, sp, 
  units)
if (!require("lpsymphony")){
  BiocManager::install("lpsymphony")
  library(lpsymphony)
}

options(scipen = 999)

redo <- FALSE

Data setup

dir_data <- here("data/prioritizr")
pu_shp   <- file.path(dir_data, "pu.shp")
pu_url   <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip   <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")

dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp) | redo){
  download.file(pu_url, pu_zip)
  unzip(pu_zip, exdir = dir_data)
  dir_unzip   <- file.path(dir_data, "data")
  files_unzip <- list.files(dir_unzip, full.names = T)
  file.rename(
    files_unzip, 
    files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
  unlink(c(pu_zip, dir_unzip), recursive = T)
}

2 Data

Data import

# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")

# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)

# import vegetation data
veg_data <- stack(vegetation_tif)

2.1 Planning unit data

# print a short summary of the data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 5
## names       :   id,             cost, status, locked_in, locked_out 
## min values  :  557, 3.59717531470679,      0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1
# plot the planning unit data
plot(pu_data)

# plot an interactive map of the planning unit data
mapview(pu_data)
# print the structure of object
str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  516 obs. of  5 variables:
##   ..@ polygons   :List of 516
##   ..@ plotOrder  : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
##   ..@ bbox       : num [1:2, 1:2] 348703 5167775 611932 5344516
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# print the class of the object
class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# print the slots of the object
slotNames(pu_data)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
# print the coordinate reference system
print(pu_data@proj4string)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print number of planning units (geometries) in the data
nrow(pu_data)
## [1] 516
# print the first six rows in the data
head(pu_data@data)
##    id     cost status locked_in locked_out
## 1 557 29.74225      0     FALSE      FALSE
## 2 558 29.87703      0     FALSE      FALSE
## 3 574 28.60687      0     FALSE      FALSE
## 4 575 30.83416      0     FALSE      FALSE
## 5 576 38.75511      0     FALSE      FALSE
## 6 577 38.11618      2      TRUE      FALSE
# print the first six values in the cost column of the attribute data
head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618
# print the highest cost value
max(pu_data$cost)
## [1] 47.23834
# print the smallest cost value
min(pu_data$cost)
## [1] 3.597175
# print average cost value
mean(pu_data$cost)
## [1] 26.87393
# plot a map of the planning unit cost data
spplot(pu_data, "cost")

# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")

Question: How many planning units are in the planning unit data?
Answer: There are 516 planning units in the unit data.

Question: What is the highest cost value?
Answer: The highest cost value is $ 47.24 in Australian dollars.

Question: Is there a spatial pattern in the planning unit cost values (hint: use plot to make a map)?
Answer: Yes, the planning units with the highest cost tend to be in the northern most central part of Tasmania, followed by the western half of the region.

2.2 Vegetation data

# print a short summary of the data
print(veg_data)
## class      : RasterStack 
## dimensions : 164, 326, 53464, 32  (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## names      : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ... 
## min values :            0,            0,            0,            0,            0,            0,            0,            0,            0,             0,             0,             0,             0,             0,             0, ... 
## max values :            1,            1,            1,            1,            1,            1,            1,            1,            1,             1,             1,             1,             1,             1,             1, ...
# plot a map of the 20th vegetation class
plot(veg_data[[20]])

# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])
# print number of rows in the data
nrow(veg_data)
## [1] 164
# print number of columns  in the data
ncol(veg_data)
## [1] 326
# print number of cells in the data
ncell(veg_data)
## [1] 53464
# print number of layers in the data
nlayers(veg_data)
## [1] 32
# print  resolution on the x-axis
xres(veg_data)
## [1] 967
# print resolution on the y-axis
yres(veg_data)
## [1] 1020
# print spatial extent of the grid, i.e. coordinates for corners
extent(veg_data)
## class      : Extent 
## xmin       : 298636.7 
## xmax       : 613878.7 
## ymin       : 5167756 
## ymax       : 5335036
# print the coordinate reference system
print(veg_data@crs)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print a summary of the first layer in the stack
print(veg_data[[1]])
## class      : RasterLayer 
## band       : 1  (of  32  bands)
## dimensions : 164, 326, 53464  (nrow, ncol, ncell)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## source     : vegetation.tif 
## names      : vegetation.1 
## values     : 0, 1  (min, max)
# print the value in the 800th cell in the first layer of the stack
print(veg_data[[1]][800])
##   
## 0
# print the value of the cell located in the 30th row and the 60th column of
# the first layer
print(veg_data[[1]][30, 60])
##   
## 0
# calculate the sum of all the cell values in the first layer
cellStats(veg_data[[1]], "sum")
## [1] 17
# calculate the maximum value of all the cell values in the first layer
cellStats(veg_data[[1]], "max")
## [1] 1
# calculate the minimum value of all the cell values in the first layer
cellStats(veg_data[[1]], "min")
## [1] 0
# calculate the mean value of all the cell values in the first layer
cellStats(veg_data[[1]], "mean")
## [1] 0.00035239

Question: What part of the study area is the 13th vegetation class found in (hint: make a map)? For instance, is it in the south-eastern part of the study area?
Answer: The 13th vegetation class is found in the eastern part of the studio area with the most density in the north-eastern part of the study area.

# plot a map of the 13th vegetation class
plot(veg_data[[13]])

Question: What proportion of cells contain the 12th vegetation class?

veg_data_freq12 <- freq(veg_data[[12]])

Answer: Of the total cells in the raster (53464), 819 cells contain the 12th vegetation class, making the proportion of the cells that contain the vegetation class 1.53%.

Question: Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?

prop_df <- data.frame()

for (i in 1:nlayers(veg_data)){
 prop <- freq(veg_data[[i]])[[2, 2]] / ncell(veg_data)
 prop_df <- rbind(prop_df, prop)
}

prop_df <- rownames_to_column(prop_df) %>% 
  rename(proportion = X0.000317970971120754)

prop_highest <- prop_df %>% filter(proportion == max(proportion))

Answer: The vegetation class that is most abundant is 12.

3 Gap analysis

3.1 Feature abundance

# create `prioritizr` problem with only the data
p0 <- problem(pu_data, veg_data, cost_column = "cost")

# print empty problem,
# we can see that only the cost and feature data are defined
print(p0)

# calculate amount of each feature in each planning unit
abundance_data <- feature_abundances(p0)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## # … with 22 more rows
# note that only the first ten rows are printed,
# this is because the abundance_data object is a tibble (i.e. tbl_df) object
# and not a standard data.frame object
print(class(abundance_data))
## [1] "tbl_df"     "tbl"        "data.frame"
# we can print all of the rows in abundance_data like this
print(abundance_data, n = Inf)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## 11 vegetation.11               23.6                  1
## 12 vegetation.12              748.                   1
## 13 vegetation.13              126.                   1
## 14 vegetation.14               10.5                  1
## 15 vegetation.15               17.5                  1
## 16 vegetation.16               15.0                  1
## 17 vegetation.17              213.                   1
## 18 vegetation.18               14.3                  1
## 19 vegetation.19               17.1                  1
## 20 vegetation.20               21.4                  1
## 21 vegetation.21               18.6                  1
## 22 vegetation.22              297.                   1
## 23 vegetation.23               20.3                  1
## 24 vegetation.24              165.                   1
## 25 vegetation.25              716.                   1
## 26 vegetation.26               24.0                  1
## 27 vegetation.27               18.8                  1
## 28 vegetation.28               17.5                  1
## 29 vegetation.29               24.7                  1
## 30 vegetation.30               59.0                  1
## 31 vegetation.31               60.0                  1
## 32 vegetation.32               32                    1
# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
  (abundance_data$absolute_abundance * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
##    feature       absolute_abundance relative_abundance absolute_abundance_km2
##    <chr>                      <dbl>              <dbl>                 [km^2]
##  1 vegetation.1                16.0                  1                   15.8
##  2 vegetation.2                14.3                  1                   14.1
##  3 vegetation.3                10.4                  1                   10.2
##  4 vegetation.4                17.8                  1                   17.6
##  5 vegetation.5                13.0                  1                   12.8
##  6 vegetation.6                14.3                  1                   14.1
##  7 vegetation.7                20.0                  1                   19.7
##  8 vegetation.8                14.0                  1                   13.9
##  9 vegetation.9                18.0                  1                   17.8
## 10 vegetation.10               20.0                  1                   19.7
## # … with 22 more rows
# calculate the average abundance of the features
mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]
# plot histogram of the features' abundances
hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")

# find the abundance of the feature with the largest abundance
max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]
# find the name of the feature with the largest abundance
abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"

Question: What is the median abundance of the features (hint: median)?
Answer: The median abundance from the abundance dataset is 19.1165038783561
Question: What is the name of the feature with smallest abundance?
Answer:
Question: How many features have a total abundance greater than 100 km^2 (hint: use sum(abundance_data$absolute_abundance_km2 > set_units(threshold, km^2) with the correct threshold value)?
Answer:

3.2 Feature representation

# create column in planning unit data with binary values (zeros and ones)
# indicating if a planning unit is covered by protected areas or not
pu_data$pa_status <- as.numeric(pu_data$locked_in)

# calculate feature representation by protected areas
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])

# print feature representation data
print(repr_data)
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0         0            0     
##  2 overall vegetation.2          14.3         0            0     
##  3 overall vegetation.3          10.4         0            0     
##  4 overall vegetation.4          17.8         0            0     
##  5 overall vegetation.5          13.0         0            0     
##  6 overall vegetation.6          14.3         0            0     
##  7 overall vegetation.7          20.0         0            0     
##  8 overall vegetation.8          14.0         0            0     
##  9 overall vegetation.9          18.0         0.846        0.0470
## 10 overall vegetation.10         20.0         0            0     
## # … with 22 more rows
# add new column with the areas represented in km^2
repr_data$absolute_held_km2 <-
  (repr_data$absolute_held * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print representation data
print(repr_data)
## # A tibble: 32 × 6
##    summary feature       total_amount absolute_held relative_held absolute_held_k…
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>           [km^2]
##  1 overall vegetation.1          16.0         0            0                 0    
##  2 overall vegetation.2          14.3         0            0                 0    
##  3 overall vegetation.3          10.4         0            0                 0    
##  4 overall vegetation.4          17.8         0            0                 0    
##  5 overall vegetation.5          13.0         0            0                 0    
##  6 overall vegetation.6          14.3         0            0                 0    
##  7 overall vegetation.7          20.0         0            0                 0    
##  8 overall vegetation.8          14.0         0            0                 0    
##  9 overall vegetation.9          18.0         0.846        0.0470            0.834
## 10 overall vegetation.10         20.0         0            0                 0    
## # … with 22 more rows

Question: What is the average proportion of the features held in protected areas (hint: use mean(table$relative_held) with the correct table name)?
Answer:
Question: If we set a target of 10% coverage by protected areas, how many features fail to meet this target (hint: use sum(table$relative_held >= target_value) with the correct table name)?
Answer:
Question: If we set a target of 20% coverage by protected areas, how many features fail to meet this target?
Answer:
Question: Is there a relationship between the total abundance of a feature and how well it is represented by protected areas (hint: plot(abundance_data$absolute_abundance ~ repr_data$relative_held))?
Answer:

4 Spatial prioritizations

4.1 Starting out simple

# print planning unit data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 6
## names       :   id,             cost, status, locked_in, locked_out, pa_status 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1
# make prioritization problem
p1_rds <- file.path(dir_data, "p1.rds")
if (!file.exists(p1_rds)){
  p1 <- problem(pu_data, veg_data, cost_column = "cost") %>%
        add_min_set_objective() %>%
        add_relative_targets(0.05) %>% # 5% representation targets
        add_binary_decisions() %>%
        add_lpsymphony_solver()
  saveRDS(p1, p1_rds)
}
p1 <- readRDS(p1_rds)

# print problem
print(p1)

# solve problem
s1 <- solve(p1)

# print solution, the solution_1 column contains the solution values
# indicating if a planning unit is (1) selected or (0) not
print(s1)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# calculate number of planning units selected in the prioritization
eval_n_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall    15
# calculate total cost of the prioritization
eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall  385.
# plot solution
# selected = green, not selected = grey
spplot(s1, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s1",
       colorkey = FALSE)

Question: How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?
Answer:
Question: Is there a pattern in the spatial distribution of the priority areas?
Answer:
Question: Can you verify that all of the targets were met in the prioritization (hint: eval_feature_representation_summary(p1, s1[, "solution_1"]))?
Answer:

4.2 Adding complexity

# plot locked_in data
# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey80", "darkblue"),
       main = "locked_in", colorkey = FALSE)

# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
if (!file.exists(p2_rds)){
  p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
      add_min_set_objective() %>%
      add_relative_targets(0.05) %>%
      add_locked_in_constraints("locked_in") %>%
      add_binary_decisions() %>%
      add_lpsymphony_solver()
  saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)

# print problem
print(p2)

# solve problem
s2 <- solve(p2)

# plot solution
# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
       colorkey = FALSE)

Let’s pretend that we talked to an expert on the vegetation communities in our study system and they recommended that a 10% target was needed for each vegetation class. So, equipped with this information, let’s set the targets to 10%.

# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
  p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)

# print problem
print(p3)

# solve problem
s3 <- solve(p3)

# plot solution
# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
       colorkey = FALSE)

Next, let’s lock out highly degraded areas. Similar to before, this information is present in our planning unit data so we can use the locked_out column name to achieve this.

# plot locked_out data
# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
       main = "locked_out", colorkey = FALSE)

# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
  p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)
# print problem
print(p4)

# solve problem
s4 <- solve(p4)

# plot solution
# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
       colorkey = FALSE)

Question: What is the cost of the planning units selected in s2, s3, and s4?
Answer:
Question: How many planning units are in s2, s3, and s4?
Answer:
Question: Do the solutions with more planning units have a greater cost? Why (or why not)?
Answer:
Question: Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?
Answer:
Question: Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?
Answer:

4.3 Penalizing fragmentation

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds)){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# print problem
print(p5)

# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)

# print solution
print(s5)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Question: What is the cost the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?
Answer:
Question: Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this is a useful penalty value? Why (or why not)?
Answer:
Question:
Answer: Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?